# Authors:	        	Carlos F. Gould; Xiaoxue Hou
# Institution:	    	Columbia University; Johns Hopkins University
# File Date: 	      	August 1, 2020
# Publication title:	Jointly modeling the adoption and use of LPG in rural India
# File purpose:	    	Make Figure 1 in the main text

# 1. Libraries ------


library(tidyverse)
library(directlabels)
library(scales)
library(lubridate)

library(readxl)
library(haven)

library(ggthemes)
library(ggpubr)
library(ggsci)
library(gghighlight)

library(ggiraphExtra)
library(ggiraph)
library(gridExtra)


# 2. Read in the data ------

# Data were compiled from the data sources described in text:
# National Sample Survey
# National family health survey - 4
# ACCESS Survey

full_rural <- read_excel("~/Dropbox/State_CleanFuel_1993_2018.xlsx")

# 3. Summarize statistics state-wise -----

full_rural <- full_rural %>%
  gather(Year, PercentLPG, -State)

full_rural[full_rural=="1993"] <- "1993-01-01"
full_rural[full_rural=="1999"] <- "2000-01-01"
full_rural[full_rural=="2005"] <- "2005-01-01"
full_rural[full_rural=="2010"] <- "2010-01-01"
full_rural[full_rural=="2012"] <- "2012-01-01"
full_rural[full_rural=="2015"] <- "2015-06-01"
full_rural[full_rural=="2016"] <- "2016-01-01"
full_rural[full_rural=="2018"] <- "2018-06-01"

full_rural$Year <- ymd(full_rural$Year)

full_rural$PercentLPG <- full_rural$PercentLPG / 100

full_rural <- full_rural[complete.cases(full_rural),]

full_rural$Source <- c(rep("NSS", 81), rep("ACCESS", 6), rep("NFHS-4", 17), rep("ACCESS", 6))

study_state <- c("Bihar", "Madhya Pradesh", "Odisha", "Uttar Pradesh", "West Bengal", "Jharkhand")

full_rural$study_state_val <- as.numeric(ifelse((full_rural$State %in% study_state)==TRUE, 1, 0))

# Separate study states
state_lpg_study <- full_rural %>%
  filter(State %in% study_state)

state_lpg_not_study <- full_rural %>%
  filter(!(State %in% study_state))

# Create state-wise subsets
bihar <- full_rural %>%
  filter(State=="Bihar")
mp <- full_rural %>%
  filter(State=="Madhya Pradesh")
odisha <- full_rural %>%
  filter(State=="Odisha")
up <- full_rural %>%
  filter(State=="Uttar Pradesh")
wb <- full_rural %>%
  filter(State=="West Bengal")
jharkhand <- full_rural %>%
  filter(State=="Jharkhand")



# 4. Make figures -------

# PMUY label
pmuy_fig <- ggplot() + 
  annotate("text", x = ymd("2016-9-01"), y = -.05, size = 8, label = "PMUY", face="bold") +
  scale_x_date(breaks = ymd(c("1990-01-01","1995-01-01","2000-01-01", "2005-01-01","2010-01-01","2015-01-01","2020-01-01")),
               labels = date_format("%Y", tz = "CET")) +
  scale_y_continuous(labels = scales::percent) + 
  labs(title = expression(""))  + 
  ylab("") + 
  xlab("") +
  theme_clean() +
  theme(axis.line = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        plot.background = element_blank()) + 
  coord_cartesian(xlim = c(ymd("1991-01-01", ymd("2021-01-01"))), ylim =c(-0.05,0.05), clip = "off")
pmuy_fig

# Bihar
bihar_fig <- ggplot() + 
  geom_segment(aes(x= ymd("2016-06-27"), xend=ymd("2016-06-27"), y=-0.05, yend=1), color = "grey40", linetype = 2, size = 1.1) + 
  annotate("text", x = ymd("2016-09-01"), y = 1.1, size = 10, label = "PMUY", face="bold") +
  scale_x_date(breaks = ymd(c("1990-01-01","1995-01-01","2000-01-01", "2005-01-01","2010-01-01","2015-01-01","2020-01-01")),
               labels = date_format("%Y", tz = "CET")) +
  scale_y_continuous(labels = scales::percent) + 
  geom_point(data = state_lpg_not_study, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="grey", alpha=0.4) + 
  geom_line(data = state_lpg_not_study, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="grey", alpha=0.5) + 
  geom_point(data = bihar, aes(x=Year, y=PercentLPG, shape = Source), color="dodgerblue4", size = 4) + 
  geom_line(data = bihar, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="dodgerblue4", size = 1.25) + 
  annotate("text", x = ymd("1995-06-01"), y = 0.75, size = 12, label = "Bihar") +
  annotate("text", x = ymd("1991-11-01"), y = 0.09, size = 10, label = "<1%") +
  annotate("text", x = ymd("2020-06-01"), y = 0.56, size = 10, label = "53%") +
  labs(title = expression(""))  + 
  ylab("") + 
  xlab("") +
  theme_clean() +
  theme(plot.title = element_text(size=24),
        plot.subtitle = element_text(size=18),
        axis.text.x = element_blank(),
        plot.background = element_blank(),
        axis.text.y = element_text(size=20),
        axis.title = element_text(size=20), 
        legend.text = element_text(size=16),
        legend.title = element_blank(),
        legend.position = "none",
        plot.margin = unit(c(0, 2, 0, 0), "cm")) + 
  coord_cartesian(xlim = c(ymd("1991-01-01", ymd("2021-01-01"))), ylim =c(-.05,1.05), clip = "off")

bihar_fig

# Madhya Pradesh
mp_fig <- ggplot() + 
  geom_segment(aes(x= ymd("2016-07-04"), xend=ymd("2016-07-04"), y=-0.05, yend=1), color = "grey40", linetype = 2, size = 1.1) + 
  scale_x_date(breaks = ymd(c("1990-01-01","1995-01-01","2000-01-01", "2005-01-01","2010-01-01","2015-01-01","2020-01-01")),
               labels = date_format("%Y", tz = "CET")) +
  scale_y_continuous(labels = scales::percent) + 
  geom_point(data = state_lpg_not_study, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="grey", alpha=0.4) + 
  geom_line(data = state_lpg_not_study, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="grey", alpha=0.5) + 
  geom_point(data = mp, aes(x=Year, y=PercentLPG, shape = Source), color="dodgerblue4", size = 4) + 
  geom_line(data = mp, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="dodgerblue4", size = 1.25) + 
  annotate("text", x = ymd("1995-06-01"), y = 0.75, size = 12, label = "Madhya Pradesh") +
  annotate("text", x = ymd("1991-11-01"), y = 0.09, size = 10, label = "1%") +
  annotate("text", x = ymd("2020-06-01"), y = 0.50, size = 10, label = "46%") +
  ylab("") + 
  xlab("") +
  theme_clean() +
  theme(plot.title = element_text(size=20),
        plot.subtitle = element_text(size=18),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=20),
        axis.title = element_text(size=20), 
        plot.background = element_blank(),
        legend.text = element_text(size=16),
        legend.title = element_blank(),
        legend.position = "none") + 
  coord_cartesian(xlim = c(ymd("1991-01-01", ymd("2021-01-01"))), ylim =c(-.05,1.05))

# Odisha 
odisha_fig <- ggplot() + 
  geom_segment(aes(x= ymd("2016-06-20"), xend=ymd("2016-06-20"), y=-0.05, yend=1), color = "grey40", linetype = 2, size = 1.1) + 
  scale_x_date(breaks = ymd(c("1990-01-01","1995-01-01","2000-01-01", "2005-01-01","2010-01-01","2015-01-01","2020-01-01")),
               labels = date_format("%Y", tz = "CET")) +
  scale_y_continuous(labels = scales::percent) + 
  geom_point(data = state_lpg_not_study, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="grey", alpha=0.4) + 
  geom_line(data = state_lpg_not_study, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="grey", alpha=0.5) + 
  geom_point(data = odisha, aes(x=Year, y=PercentLPG, shape = Source), color="dodgerblue4", size = 4) + 
  geom_line(data = odisha, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="dodgerblue4", size = 1.25) + 
  annotate("text", x = ymd("1995-06-01"), y = 0.75, size = 12, label = "Odisha") +
  annotate("text", x = ymd("1991-11-01"), y = 0.09, size = 10, label = "<1%") +
  annotate("text", x = ymd("2020-06-01"), y = 0.54, size = 10, label = "50%") +
  ylab("") + 
  xlab("") +
  theme_clean() +
  theme(plot.title = element_text(size=20),
        plot.subtitle = element_text(size=18),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=20),
        plot.background = element_blank(),
        axis.title = element_text(size=20), 
        legend.text = element_text(size=16),
        legend.title = element_blank(),
        legend.position = "none") + 
  coord_cartesian(xlim = c(ymd("1991-01-01", ymd("2021-01-01"))), ylim =c(-.05,1.05))

# West Bengal 
wb_fig <- ggplot() + 
  geom_segment(aes(x= ymd("2016-09-14"), xend=ymd("2016-09-14"), y=-0.05, yend=1), color = "grey40", linetype = 2, size = 1.1) + 
  scale_x_date(breaks = ymd(c("1990-01-01","1995-01-01","2000-01-01", "2005-01-01","2010-01-01","2015-01-01","2020-01-01")),
               labels = date_format("%Y", tz = "CET")) +
  scale_y_continuous(labels = scales::percent) + 
  geom_point(data = state_lpg_not_study, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="grey", alpha=0.4) + 
  geom_line(data = state_lpg_not_study, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="grey", alpha=0.5) + 
  geom_point(data = wb, aes(x=Year, y=PercentLPG, shape = Source), color="dodgerblue4", size = 4) + 
  geom_line(data = wb, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="dodgerblue4", size = 1.25) + 
  annotate("text", x = ymd("1995-06-01"), y = 0.75, size = 12, label = "West Bengal") +
  annotate("text", x = ymd("1991-11-01"), y = 0.09, size = 10, label = "<1%") +
  annotate("text", x = ymd("2020-06-01"), y = 0.73, size = 10, label = "69%") +
  ylab("") + 
  xlab("") +
  theme_clean() +
  theme(plot.title = element_text(size=20),
        plot.subtitle = element_text(size=18),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=20),
        axis.title = element_text(size=20), 
        plot.background = element_blank(),
        legend.text = element_text(size=16),
        legend.title = element_blank(),
        legend.position = "none") + 
  coord_cartesian(xlim = c(ymd("1991-01-01", ymd("2021-01-01"))), ylim =c(-.05,1.05))

# Uttar Pradesh
up_fig <- ggplot() + 
  geom_segment(aes(x= ymd("2016-05-01"), xend=ymd("2016-05-01"), y=-0.05, yend=1), color = "grey40", linetype = 2, size = 1.1) + 
  scale_x_date(breaks = ymd(c("1990-01-01","1995-01-01","2000-01-01", "2005-01-01","2010-01-01","2015-01-01","2020-01-01")),
               labels = date_format("%Y", tz = "CET")) +
  scale_y_continuous(labels = scales::percent) + 
  geom_point(data = state_lpg_not_study, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="grey", alpha=0.4) + 
  geom_line(data = state_lpg_not_study, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="grey", alpha=0.5) + 
  geom_point(data = up, aes(x=Year, y=PercentLPG, shape = Source), color="dodgerblue4", size = 4) + 
  geom_line(data = up, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="dodgerblue4", size = 1.25) + 
  annotate("text", x = ymd("1995-06-01"), y = 0.75, size = 12, label = "Uttar Pradesh") +
  annotate("text", x = ymd("1992-01-01"), y = 0.09, size = 10, label = "1%") +
  annotate("text", x = ymd("2020-06-01"), y = 0.67, size = 10, label = "63%") +
  ylab("") + 
  xlab("") +
  theme_clean() +
  theme(plot.title = element_text(size=20),
        plot.subtitle = element_text(size=18),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=20),
        plot.background = element_blank(),
        axis.title = element_text(size=20), 
        legend.text = element_text(size=16),
        legend.title = element_blank(),
        legend.position = "none") + 
  coord_cartesian(xlim = c(ymd("1991-01-01", ymd("2021-01-01"))), ylim =c(-.05,1.05))

# Jharkhand
jharkhand_fig <- ggplot() + 
  geom_segment(aes(x= ymd("2016-10-19"), xend=ymd("2016-10-19"), y=-0.05, yend=1), color = "grey40", linetype = 2, size = 1.1) + 
  scale_x_date(breaks = ymd(c("1990-01-01","1995-01-01","2000-01-01", "2005-01-01","2010-01-01","2015-01-01","2020-01-01")),
               labels = date_format("%Y", tz = "CET")) +
  scale_y_continuous(labels = scales::percent) + 
  geom_point(data = state_lpg_not_study, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="grey", alpha=0.4) + 
  geom_line(data = state_lpg_not_study, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="grey", alpha=0.5) + 
  geom_point(data = jharkhand, aes(x=Year, y=PercentLPG, shape = Source), color="dodgerblue4", size = 4) + 
  geom_line(data = jharkhand, aes(x=Year, y=PercentLPG, shape = Source, group = State), color="dodgerblue4", size = 1.25) + 
  annotate("text", x = ymd("1995-06-01"), y = 0.75, size = 12, label = "Jharkhand") +
  annotate("text", x = ymd("2004-01-01"), y = 0.09, size = 10, label = "1%") +
  annotate("text", x = ymd("2020-06-01"), y = 0.36, size = 10, label = "36%") +
  ylab("") + 
  theme_clean() +
  theme(plot.title = element_text(size=20),
        plot.subtitle = element_text(size=18),
        axis.text = element_text(size=20),
        axis.text.x = element_text(size=32),
        axis.title.x = element_blank(), 
        plot.background = element_blank(),
        legend.text = element_text(size=16),
        legend.title = element_blank(),
        legend.position = "none") + 
  coord_cartesian(xlim = c(ymd("1991-01-01", ymd("2021-01-01"))), ylim =c(-.05,1.05))


# Make y-axis
y_axis <- text_grob("Percent of Rural Households Cooking with LPG", rot = 90, vjust=1.5,
                    size = 36)

# 5. Compile and save figure ------
# png("~/Dropbox/StateLPGTransition_Facet_Rural.png",
#     height = 1000, width = 1000)
# 
# gridExtra::grid.arrange(y_axis,
#                         plot_grid(
#                           bihar_fig, mp_fig, odisha_fig, up_fig, wb_fig, jharkhand_fig,
#                           rel_heights = c(1, 1, 1, 1, 1, 1),
#                           ncol=1,
#                           align = "v"),
#                         widths=grid::unit.c(unit(5.5, "lines"), unit(1, "npc")))
# 
# dev.off()

